Installation

source("https://bioconductor.org/biocLite.R")
source("http://bioconductor.org/workflows.R")
workflowInstall("rnaseqGene")
biocLite("GO.db")
biocLite("org.Hs.eg.db")
biocLite("airway")
biocLite("DESeq2")

Read in data

Starting from SummarizedExperiment

library(rnaseqGene)
library("airway")
data("airway")
se <- airway
se$dex <- relevel(se$dex, "untrt")
se$dex
## [1] untrt trt   untrt trt   untrt trt   untrt trt  
## Levels: untrt trt
library("DESeq2")
dds <- DESeqDataSet(se, design = ~ cell + dex)

Pre-filtering the dataset

nrow(dds)
## [1] 64102
dds <- dds[ rowSums(counts(dds)) > 1, ]
nrow(dds)
## [1] 29391

The rlog transformation

rld <- rlog(dds, blind=FALSE)
head(assay(rld), 3)
##                 SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516 SRR1039517 SRR1039520
## ENSG00000000003   9.385683   9.052608   9.516875   9.285338   9.839085   9.530311   9.663255
## ENSG00000000419   8.869616   9.138271   9.036116   9.075295   8.972126   9.131824   8.861534
## ENSG00000000457   7.961369   7.881385   7.824079   7.921490   7.751699   7.886441   7.957121
##                 SRR1039521
## ENSG00000000003   9.277699
## ENSG00000000419   9.060905
## ENSG00000000457   7.912123
par( mfrow = c( 1, 2 ) )
dds <- estimateSizeFactors(dds)
plot(log2(counts(dds, normalized=TRUE)[,1:2] + 1),
     pch=16, cex=0.3)
plot(assay(rld)[,1:2],
     pch=16, cex=0.3)

Differential expression analysis

dds <- DESeq(dds)
(res <- results(dds))
## log2 fold change (MAP): dex trt vs untrt 
## Wald test p-value: dex trt vs untrt 
## DataFrame with 29391 rows and 6 columns
##                    baseMean log2FoldChange      lfcSE       stat      pvalue       padj
##                   <numeric>      <numeric>  <numeric>  <numeric>   <numeric>  <numeric>
## ENSG00000000003 708.6021697    -0.37415193 0.09884432 -3.7852648 0.000153545 0.00128686
## ENSG00000000419 520.2979006     0.20206144 0.10974240  1.8412340 0.065587276 0.19676183
## ENSG00000000457 237.1630368     0.03616620 0.13834538  0.2614196 0.793768939 0.91372953
## ENSG00000000460  57.9326331    -0.08446385 0.24990676 -0.3379815 0.735377161 0.88385059
## ENSG00000000938   0.3180984    -0.08413904 0.15133427 -0.5559814 0.578223585         NA
## ...                     ...            ...        ...        ...         ...        ...
## ENSG00000273485   1.2864477     0.03398815  0.2932360  0.1159071   0.9077261         NA
## ENSG00000273486  15.4525365    -0.09560732  0.3410333 -0.2803460   0.7792120  0.9062268
## ENSG00000273487   8.1632350     0.55007412  0.3725061  1.4766847   0.1397602  0.3389275
## ENSG00000273488   8.5844790     0.10515293  0.3683834  0.2854442   0.7753038  0.9039857
## ENSG00000273489   0.2758994     0.06947900  0.1512520  0.4593591   0.6459763         NA

Converting ENSEMBL IDs to GO IDs

library(GO.db)
library(org.Hs.eg.db)
res$go_id <- mapIds(org.Hs.eg.db,
                    keys=rownames(res),
                    column="GO",
                    keytype="ENSEMBL",
                    multiVals="first")

Fitlering results

Remove genes with no GO annotation

res <- res[!is.na(res$go_id), ]
res <- res[res$go_id %in% keys(org.Hs.eg.db, keytype = "GO"), ]

Remove insignificant genes

res <- res[(! is.na(res$padj)) & res$padj <= 0.05, ]

Remove genes with small changes

res <-  res[abs(res$log2FoldChange) >= 0.5, ]

Getting classificaiton

term_class <- function(x, current = x, all_paths = TRUE, type = GOCCPARENTS) {
  # Get immediate children of current taxon
  parents = tryCatch({
    names(AnnotationDbi::Term(as.list(type[x[1]])[[1]]))
  }, error = function(e) {
    c()
  })
  
  # only go down one path if desired
  if (! all_paths) {
    parents <- parents[1]
  }
  parents <- parents[parents != "all"]
  
  if (is.null(parents)) {
    return(c())
  } else if (length(parents) == 0) {
    return(paste0(collapse = "|", AnnotationDbi::Term(x)))
  } else {
    next_x <- lapply(parents, function(y) c(y, x))
    
    # Run this function on them to get their output
    child_output <- lapply(next_x, term_class, all_paths = all_paths, type = type)
    output <- unlist(child_output, recursive = FALSE)
    
    return(output)
  }
}
cc_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOCCPARENTS)
mf_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOMFPARENTS)
bp_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOBPPARENTS)

Cellular component

cc_res <- res[rep(1:nrow(res), sapply(cc_class, length)), ]
cc_res$class <- unlist(cc_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(cc_res),
                             taxon_col = c("class" = -1),
                             other_col_type = "obs_info",
                             class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
                      change = function(x, subset = NULL) {
                        vapply(obs(x),
                               function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
                               numeric(1))                      })
data %>%
  heat_tree(node_label = name,
            node_size = n_obs,
            node_color = change,
            node_color_trans = "linear",
            node_color_range = diverging_palette(),
            node_color_interval = c(-1, 1),
            edge_color_trans = "linear",
            edge_color_range = diverging_palette(),
            edge_color_interval = c(-1, 1),
            node_label_max = 300,
            output_file = file.path(output_folder,
                                    paste0("gene_expression--cellular_component",
                                           output_format)))

Biological Process

bp_res <- res[rep(1:nrow(res), sapply(bp_class, length)), ]
bp_res$class <- unlist(bp_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(bp_res),
                             taxon_col = c("class" = -1),
                             other_col_type = "obs_info",
                             class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
                      change = function(x, subset = NULL) {
                        vapply(obs(x),
                               function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
                               numeric(1))                      })
data %>%
  heat_tree(node_label = name,
            node_size = n_obs,
            node_color = change,
            node_color_trans = "linear",
            node_color_range = diverging_palette(),
            node_color_interval = c(-1, 1),
            edge_color_trans = "linear",
            edge_color_range = diverging_palette(),
            edge_color_interval = c(-1, 1),
            node_label_max = 300,
            output_file = file.path(output_folder,
                                    paste0("gene_expression--biological_process",
                                           output_format)))

Molecular Function

mf_res <- res[rep(1:nrow(res), sapply(mf_class, length)), ]
mf_res$class <- unlist(mf_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(mf_res),
                             taxon_col = c("class" = -1),
                             other_col_type = "obs_info",
                             class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
                      change = function(x, subset = NULL) {
                        vapply(obs(x),
                               function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
                               numeric(1))                      })
data %>%
  heat_tree(node_label = name,
            node_size = n_obs,
            node_color = change,
            node_color_trans = "linear",
            node_color_range = diverging_palette(),
            node_color_interval = c(-1, 1),
            edge_color_trans = "linear",
            edge_color_range = diverging_palette(),
            edge_color_interval = c(-1, 1),
            node_label_max = 300,
            output_file = file.path(output_folder,
                                    paste0("gene_expression--molecular_function",
                                           output_format)))

Comments